/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    RandomSearch.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;

/**
 * <!-- globalinfo-start --> RandomSearch : <br/>
 * <br/>
 * Performs a Random search in the space of attribute subsets. If no start set
 * is supplied, Random search starts from a random point and reports the best
 * subset found. If a start set is supplied, Random searches randomly for
 * subsets that are as good or better than the start point with the same or or
 * fewer attributes. Using RandomSearch in conjunction with a start set
 * containing all attributes equates to the LVF algorithm of Liu and Setiono
 * (ICML-96).<br/>
 * <br/>
 * For more information see:<br/>
 * <br/>
 * H. Liu, R. Setiono: A probabilistic approach to feature selection - A filter
 * solution. In: 13th International Conference on Machine Learning, 319-327,
 * 1996.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;inproceedings{Liu1996,
 *    author = {H. Liu and R. Setiono},
 *    booktitle = {13th International Conference on Machine Learning},
 *    pages = {319-327},
 *    title = {A probabilistic approach to feature selection - A filter solution},
 *    year = {1996}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -P &lt;start set&gt;
 *  Specify a starting set of attributes.
 *  Eg. 1,3,5-7.
 *  If a start point is supplied,
 *  random search evaluates the start
 *  point and then randomly looks for
 *  subsets that are as good as or better
 *  than the start point with the same
 *  or lower cardinality.
 * </pre>
 * 
 * <pre>
 * -F &lt;percent&gt; 
 *  Percent of search space to consider.
 *  (default = 25%).
 * </pre>
 * 
 * <pre>
 * -V
 *  Output subsets as the search progresses.
 *  (default = false).
 * </pre>
 * 
 * <pre>
 * -seed &lt;num&gt;
 *  Random seed
 *  (default = 1)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class RandomSearch extends ASSearch implements StartSetHandler,
  OptionHandler, TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = 7479392617377425484L;

  /**
   * holds a starting set as an array of attributes.
   */
  private int[] m_starting;

  /** holds the start set as a range */
  private Range m_startRange;

  /** the best feature set found during the search */
  private BitSet m_bestGroup;

  /** the merit of the best subset found */
  private double m_bestMerit;

  /**
   * only accept a feature set as being "better" than the best if its merit is
   * better or equal to the best, and it contains as many or fewer features than
   * the best (this allows LVF to be implemented).
   */
  private boolean m_onlyConsiderBetterAndSmaller;

  /** does the data have a class */
  private boolean m_hasClass;

  /** holds the class index */
  private int m_classIndex;

  /** number of attributes in the data */
  private int m_numAttribs;

  /** seed for random number generation */
  private int m_seed;

  /** percentage of the search space to consider */
  private double m_searchSize;

  /** the number of iterations performed */
  private int m_iterations;

  /** random number object */
  private Random m_random;

  /** output new best subsets as the search progresses */
  private boolean m_verbose;

  /**
   * Returns a string describing this search method
   * 
   * @return a description of the search suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "RandomSearch : \n\nPerforms a Random search in "
      + "the space of attribute subsets. If no start set is supplied, Random "
      + "search starts from a random point and reports the best subset found. "
      + "If a start set is supplied, Random searches randomly for subsets "
      + "that are as good or better than the start point with the same or "
      + "or fewer attributes. Using RandomSearch in conjunction with a start "
      + "set containing all attributes equates to the LVF algorithm of Liu "
      + "and Setiono (ICML-96).\n\n" + "For more information see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;

    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "H. Liu and R. Setiono");
    result.setValue(Field.TITLE,
      "A probabilistic approach to feature selection - A filter solution");
    result.setValue(Field.BOOKTITLE,
      "13th International Conference on Machine Learning");
    result.setValue(Field.YEAR, "1996");
    result.setValue(Field.PAGES, "319-327");

    return result;
  }

  /**
   * Constructor
   */
  public RandomSearch() {
    resetOptions();
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   **/
  @Override
  public Enumeration<Option> listOptions() {
    Vector<Option> newVector = new Vector<Option>(4);

    newVector.addElement(new Option("\tSpecify a starting set of attributes."
      + "\n\tEg. 1,3,5-7." + "\n\tIf a start point is supplied,"
      + "\n\trandom search evaluates the start"
      + "\n\tpoint and then randomly looks for"
      + "\n\tsubsets that are as good as or better"
      + "\n\tthan the start point with the same" + "\n\tor lower cardinality.",
      "P", 1, "-P <start set>"));

    newVector.addElement(new Option("\tPercent of search space to consider."
      + "\n\t(default = 25%).", "F", 1, "-F <percent> "));
    newVector.addElement(new Option(
      "\tOutput subsets as the search progresses." + "\n\t(default = false).",
      "V", 0, "-V"));

    newVector.addElement(new Option("\tRandom seed\n\t(default = 1)", "seed",
      1, "-seed <num>"));
    return newVector.elements();
  }

  /**
   * Parses a given list of options.
   * <p/>
   * 
   * <!-- options-start --> Valid options are:
   * <p/>
   * 
   * <pre>
   * -P &lt;start set&gt;
   *  Specify a starting set of attributes.
   *  Eg. 1,3,5-7.
   *  If a start point is supplied,
   *  random search evaluates the start
   *  point and then randomly looks for
   *  subsets that are as good as or better
   *  than the start point with the same
   *  or lower cardinality.
   * </pre>
   * 
   * <pre>
   * -F &lt;percent&gt; 
   *  Percent of search space to consider.
   *  (default = 25%).
   * </pre>
   * 
   * <pre>
   * -V
   *  Output subsets as the search progresses.
   *  (default = false).
   * </pre>
   * 
   * <!-- options-end -->
   * 
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   * 
   **/
  @Override
  public void setOptions(String[] options) throws Exception {
    String optionString;
    resetOptions();

    optionString = Utils.getOption('P', options);
    if (optionString.length() != 0) {
      setStartSet(optionString);
    }

    optionString = Utils.getOption('F', options);
    if (optionString.length() != 0) {
      setSearchPercent((new Double(optionString)).doubleValue());
    }

    optionString = Utils.getOption("seed", options);
    if (optionString.length() > 0) {
      setSeed(Integer.parseInt(optionString));
    }

    setVerbose(Utils.getFlag('V', options));

    Utils.checkForRemainingOptions(options);
  }

  /**
   * Gets the current settings of RandomSearch.
   * 
   * @return an array of strings suitable for passing to setOptions()
   */
  @Override
  public String[] getOptions() {

    Vector<String> options = new Vector<String>();

    if (m_verbose) {
      options.add("-V");
    }

    if (!(getStartSet().equals(""))) {
      options.add("-P");
      options.add("" + startSetToString());
    }

    options.add("-F");
    options.add("" + getSearchPercent());

    options.add("-seed");
    options.add("" + getSeed());

    return options.toArray(new String[0]);
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String startSetTipText() {
    return "Set the start point for the search. This is specified as a comma "
      + "seperated list off attribute indexes starting at 1. It can include "
      + "ranges. Eg. 1,2,5-9,17. If specified, Random searches for subsets "
      + "of attributes that are as good as or better than the start set with "
      + "the same or lower cardinality.";
  }

  /**
   * Sets a starting set of attributes for the search. It is the search method's
   * responsibility to report this start set (if any) in its toString() method.
   * 
   * @param startSet a string containing a list of attributes (and or ranges),
   *          eg. 1,2,6,10-15. "" indicates no start point. If a start point is
   *          supplied, random search evaluates the start point and then looks
   *          for subsets that are as good as or better than the start point
   *          with the same or lower cardinality.
   * @throws Exception if start set can't be set.
   */
  @Override
  public void setStartSet(String startSet) throws Exception {
    m_startRange.setRanges(startSet);
  }

  /**
   * Returns a list of attributes (and or attribute ranges) as a String
   * 
   * @return a list of attributes (and or attribute ranges)
   */
  @Override
  public String getStartSet() {
    return m_startRange.getRanges();
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String verboseTipText() {
    return "Print progress information. Sends progress info to the terminal "
      + "as the search progresses.";
  }

  /**
   * set whether or not to output new best subsets as the search proceeds
   * 
   * @param v true if output is to be verbose
   */
  public void setVerbose(boolean v) {
    m_verbose = v;
  }

  /**
   * get whether or not output is verbose
   * 
   * @return true if output is set to verbose
   */
  public boolean getVerbose() {
    return m_verbose;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String searchPercentTipText() {
    return "Percentage of the search space to explore.";
  }

  /**
   * set the percentage of the search space to consider
   * 
   * @param p percent of the search space ( 0 < p <= 100)
   */
  public void setSearchPercent(double p) {
    p = Math.abs(p);
    if (p == 0) {
      p = 25;
    }

    if (p > 100.0) {
      p = 100;
    }

    m_searchSize = (p / 100.0);
  }

  /**
   * get the percentage of the search space to consider
   * 
   * @return the percent of the search space explored
   */
  public double getSearchPercent() {
    return m_searchSize * 100;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String seedTipText() {
    return "Seed for the random number generator";
  }

  public void setSeed(int seed) {
    m_seed = seed;
  }

  public int getSeed() {
    return m_seed;
  }

  /**
   * converts the array of starting attributes to a string. This is used by
   * getOptions to return the actual attributes specified as the starting set.
   * This is better than using m_startRanges.getRanges() as the same start set
   * can be specified in different ways from the command line---eg 1,2,3 == 1-3.
   * This is to ensure that stuff that is stored in a database is comparable.
   * 
   * @return a comma seperated list of individual attribute numbers as a String
   */
  private String startSetToString() {
    StringBuffer FString = new StringBuffer();
    boolean didPrint;

    if (m_starting == null) {
      return getStartSet();
    }

    for (int i = 0; i < m_starting.length; i++) {
      didPrint = false;

      if ((m_hasClass == false) || (m_hasClass == true && i != m_classIndex)) {
        FString.append((m_starting[i] + 1));
        didPrint = true;
      }

      if (i == (m_starting.length - 1)) {
        FString.append("");
      } else {
        if (didPrint) {
          FString.append(",");
        }
      }
    }

    return FString.toString();
  }

  /**
   * prints a description of the search
   * 
   * @return a description of the search as a string
   */
  @Override
  public String toString() {
    StringBuffer text = new StringBuffer();

    text.append("\tRandom search.\n\tStart set: ");
    if (m_starting == null) {
      text.append("no attributes\n");
    } else {
      text.append(startSetToString() + "\n");
    }
    text.append("\tNumber of iterations: " + m_iterations + " ("
      + (m_searchSize * 100.0) + "% of the search space)\n");
    text.append("\tMerit of best subset found: "
      + Utils.doubleToString(Math.abs(m_bestMerit), 8, 3) + "\n");

    return text.toString();
  }

  /**
   * Searches the attribute subset space randomly.
   * 
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  @Override
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    double best_merit;
    int sizeOfBest = m_numAttribs;
    BitSet temp;
    m_bestGroup = new BitSet(m_numAttribs);

    m_onlyConsiderBetterAndSmaller = false;
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a "
        + "Subset evaluator!");
    }

    m_random = new Random(m_seed);

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
    m_numAttribs = data.numAttributes();

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
      for (int element : m_starting) {
        if ((element) != m_classIndex) {
          m_bestGroup.set(element);
        }
      }
      m_onlyConsiderBetterAndSmaller = true;
      best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
      sizeOfBest = countFeatures(m_bestGroup);
    } else {
      // do initial random subset
      m_bestGroup = generateRandomSubset();
      best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
    }

    if (m_verbose) {
      System.out.println("Initial subset ("
        + Utils.doubleToString(Math.abs(best_merit), 8, 5) + "): "
        + printSubset(m_bestGroup));
    }

    int i;
    if (m_hasClass) {
      i = m_numAttribs - 1;
    } else {
      i = m_numAttribs;
    }
    m_iterations = (int) ((m_searchSize * Math.pow(2, i)));

    int tempSize;
    double tempMerit;
    // main loop
    for (i = 0; i < m_iterations; i++) {
      temp = generateRandomSubset();
      if (m_onlyConsiderBetterAndSmaller) {
        tempSize = countFeatures(temp);
        if (tempSize <= sizeOfBest) {
          tempMerit = ASEvaluator.evaluateSubset(temp);
          if (tempMerit >= best_merit) {
            sizeOfBest = tempSize;
            m_bestGroup = temp;
            best_merit = tempMerit;
            if (m_verbose) {
              System.out.print("New best subset ("
                + Utils.doubleToString(Math.abs(best_merit), 8, 5) + "): "
                + printSubset(m_bestGroup) + " :");
              System.out.println(Utils.doubleToString((((double) i)
                / ((double) m_iterations) * 100.0), 5, 1)
                + "% done");
            }
          }
        }
      } else {
        tempMerit = ASEvaluator.evaluateSubset(temp);
        if (tempMerit > best_merit) {
          m_bestGroup = temp;
          best_merit = tempMerit;
          if (m_verbose) {
            System.out.print("New best subset ("
              + Utils.doubleToString(Math.abs(best_merit), 8, 5) + "): "
              + printSubset(m_bestGroup) + " :");
            System.out.println(Utils.doubleToString((((double) i)
              / ((double) m_iterations) * 100.0), 5, 1)
              + "% done");
          }
        }
      }
    }
    m_bestMerit = best_merit;
    return attributeList(m_bestGroup);
  }

  /**
   * prints a subset as a series of attribute numbers
   * 
   * @param temp the subset to print
   * @return a subset as a String of attribute numbers
   */
  private String printSubset(BitSet temp) {
    StringBuffer text = new StringBuffer();

    for (int j = 0; j < m_numAttribs; j++) {
      if (temp.get(j)) {
        text.append((j + 1) + " ");
      }
    }
    return text.toString();
  }

  /**
   * converts a BitSet into a list of attribute indexes
   * 
   * @param group the BitSet to convert
   * @return an array of attribute indexes
   **/
  private int[] attributeList(BitSet group) {
    int count = 0;

    // count how many were selected
    for (int i = 0; i < m_numAttribs; i++) {
      if (group.get(i)) {
        count++;
      }
    }

    int[] list = new int[count];
    count = 0;

    for (int i = 0; i < m_numAttribs; i++) {
      if (group.get(i)) {
        list[count++] = i;
      }
    }

    return list;
  }

  /**
   * generates a random subset
   * 
   * @return a random subset as a BitSet
   */
  private BitSet generateRandomSubset() {
    BitSet temp = new BitSet(m_numAttribs);
    double r;

    for (int i = 0; i < m_numAttribs; i++) {
      r = m_random.nextDouble();
      if (r <= 0.5) {
        if (m_hasClass && i == m_classIndex) {
        } else {
          temp.set(i);
        }
      }
    }
    return temp;
  }

  /**
   * counts the number of features in a subset
   * 
   * @param featureSet the feature set for which to count the features
   * @return the number of features in the subset
   */
  private int countFeatures(BitSet featureSet) {
    int count = 0;
    for (int i = 0; i < m_numAttribs; i++) {
      if (featureSet.get(i)) {
        count++;
      }
    }
    return count;
  }

  /**
   * resets to defaults
   */
  private void resetOptions() {
    m_starting = null;
    m_startRange = new Range();
    m_searchSize = 0.25;
    m_seed = 1;
    m_onlyConsiderBetterAndSmaller = false;
    m_verbose = false;
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision$");
  }
}
